library("phyloseq")
library("vegan")
library("DESeq2")
library("ggplot2")
library("dendextend")
library("tidyr")
library("viridis")
library("reshape")
library("metacoder")
library("dplyr")
library('readr')
library('stringr')
library('agricolae')
library('ape')
library('ggrepel')
library("RColorBrewer")
library('rstatix')
library('microeco')
library('ggalluvial')
library('iNEXT')
library('ggvenn')
library('gridExtra')
sessionInfo()
## R version 4.4.2 (2024-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_Germany.utf8 LC_CTYPE=English_Germany.utf8
## [3] LC_MONETARY=English_Germany.utf8 LC_NUMERIC=C
## [5] LC_TIME=English_Germany.utf8
##
## time zone: Europe/Berlin
## tzcode source: internal
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] gridExtra_2.3 ggvenn_0.1.10
## [3] iNEXT_3.0.1 ggalluvial_0.12.5
## [5] microeco_1.12.0 rstatix_0.7.2
## [7] RColorBrewer_1.1-3 ggrepel_0.9.6
## [9] ape_5.8-1 agricolae_1.3-7
## [11] stringr_1.5.1 readr_2.1.5
## [13] dplyr_1.1.4 metacoder_0.3.7
## [15] reshape_0.8.9 viridis_0.6.5
## [17] viridisLite_0.4.2 tidyr_1.3.1
## [19] dendextend_1.19.0 ggplot2_3.5.1
## [21] DESeq2_1.46.0 SummarizedExperiment_1.36.0
## [23] Biobase_2.66.0 MatrixGenerics_1.18.1
## [25] matrixStats_1.4.1 GenomicRanges_1.58.0
## [27] GenomeInfoDb_1.42.1 IRanges_2.40.1
## [29] S4Vectors_0.44.0 BiocGenerics_0.52.0
## [31] vegan_2.6-8 lattice_0.22-6
## [33] permute_0.9-7 phyloseq_1.50.0
##
## loaded via a namespace (and not attached):
## [1] rlang_1.1.4 magrittr_2.0.3 ade4_1.7-22
## [4] compiler_4.4.2 mgcv_1.9-1 vctrs_0.6.5
## [7] reshape2_1.4.4 pkgconfig_2.0.3 crayon_1.5.3
## [10] fastmap_1.2.0 backports_1.5.0 XVector_0.46.0
## [13] rmarkdown_2.29 tzdb_0.4.0 UCSC.utils_1.2.0
## [16] purrr_1.0.2 xfun_0.49 zlibbioc_1.52.0
## [19] cachem_1.1.0 jsonlite_1.8.9 biomformat_1.34.0
## [22] rhdf5filters_1.18.0 DelayedArray_0.32.0 Rhdf5lib_1.28.0
## [25] BiocParallel_1.40.0 broom_1.0.7 parallel_4.4.2
## [28] cluster_2.1.8 R6_2.5.1 bslib_0.8.0
## [31] stringi_1.8.4 car_3.1-3 jquerylib_0.1.4
## [34] Rcpp_1.0.13-1 iterators_1.0.14 knitr_1.49
## [37] Matrix_1.7-1 splines_4.4.2 igraph_2.1.2
## [40] tidyselect_1.2.1 rstudioapi_0.17.1 abind_1.4-8
## [43] yaml_2.3.10 AlgDesign_1.2.1.1 codetools_0.2-20
## [46] tibble_3.2.1 plyr_1.8.9 withr_3.0.2
## [49] evaluate_1.0.3 survival_3.8-3 Biostrings_2.74.1
## [52] pillar_1.10.1 carData_3.0-5 foreach_1.5.2
## [55] generics_0.1.3 hms_1.1.3 munsell_0.5.1
## [58] scales_1.3.0 glue_1.8.0 tools_4.4.2
## [61] data.table_1.16.4 locfit_1.5-9.10 rhdf5_2.50.2
## [64] colorspace_2.1-1 nlme_3.1-166 GenomeInfoDbData_1.2.13
## [67] Formula_1.2-5 cli_3.6.3 S4Arrays_1.6.0
## [70] gtable_0.3.6 sass_0.4.9 digest_0.6.37
## [73] SparseArray_1.6.0 htmltools_0.5.8.1 multtest_2.62.0
## [76] lifecycle_1.0.4 httr_1.4.7 MASS_7.3-64
#reading the data
count_tab <-read.table("Raw_Counts.txt", header=T, row.names=1, check.names=F)
sample_info <- read.table("Sample_Info.txt", header=T, row.names=1, check.names=F)
tax_tab<- read.delim("Taxonomy.txt")
tax_tab$OTU[duplicated(tax_tab$OTU)] # check for duplicated OTUs
## character(0)
count_tab <- count_tab[rownames(count_tab) %in% tax_tab$OTU, ]
# sum of NTC counts for each OTU
NTC <-rowSums(count_tab[, 1:6])
# sum of sample counts for each OTU
sample_counts <- rowSums(count_tab[, 7:90])
# normalize sample counts by dividing the samples' total by 12 – as there are 12 as many samples (84) as there are NTCs (6) (84/6=14)
norm_sample_OTU_counts <- sample_counts/14
# which OTUs are deemed likely contaminants based on the threshold noted above:
blank_OTUs <- names(NTC[NTC * 2 > norm_sample_OTU_counts]) # OTUs which have more counts than 1/2 of the sample counts
length(blank_OTUs) # this approach identified 4 out of ~283 OTUs that are likely to have originated from contamination
## [1] 4
# looking at the percentage of reads retained for each sample after removing these presumed contaminant OTUs
colSums(count_tab[!rownames(count_tab) %in% blank_OTUs, ]) / colSums(count_tab) * 100
## NTC NTC NTC NTC NTC NTC P2 D26
## 47.33096 100.00000 42.47967 100.00000 100.00000 49.33333 100.00000 99.99846
## P27 P6 D6 V6 S6 V4 P9 S22
## 100.00000 100.00000 100.00000 100.00000 100.00000 100.00000 100.00000 100.00000
## D2 P30A V9 S13 S30A D9 D13 V13
## 100.00000 100.00000 100.00000 99.99891 99.99435 100.00000 100.00000 100.00000
## V23 V17 V16 D16 S17 P22 P17 D17
## 100.00000 100.00000 100.00000 100.00000 100.00000 99.99788 100.00000 99.99230
## S9 P10 D22 S4 P5 P23 D23 V2
## 100.00000 100.00000 100.00000 100.00000 100.00000 100.00000 99.99784 100.00000
## V2_3 S2_2 D24 V24 V5 S24 P26 S31
## 100.00000 99.99878 100.00000 100.00000 100.00000 100.00000 100.00000 99.99889
## S27 S2 V27 S26 V10 V2_2 V30B V78
## 100.00000 100.00000 100.00000 100.00000 99.95886 100.00000 100.00000 100.00000
## V30A P16 D30B S10 D30A S78 D31 P13
## 100.00000 100.00000 100.00000 100.00000 99.99615 100.00000 100.00000 100.00000
## D4 D5 P76 D76 V76 S30B P31 D78
## 99.99815 100.00000 100.00000 100.00000 100.00000 99.99704 100.00000 100.00000
## S76 P78 P4 P2_2 P2_3 D2_2 D27 S23
## 99.99370 100.00000 100.00000 100.00000 100.00000 99.99735 100.00000 100.00000
## P24 S2_3 V31 P30B V26 V22 D10 S16
## 100.00000 100.00000 100.00000 100.00000 100.00000 99.97795 99.99355 99.99753
## D2_3 S5
## 99.99515 100.00000
filt_count_tab <- count_tab[!rownames(count_tab) %in% blank_OTUs, -c(1:6)] # removing blank_OTUs and the blank samples from further analysis
filt_count_tab <- filt_count_tab[rowSums(filt_count_tab == 0) != ncol(filt_count_tab), ] # delete OTUs with 0 counts -> 278 OTUs left
#reorder the row and column names
sample_info<-sample_info[order(rownames(sample_info)), ]
filt_count_tab<- filt_count_tab[,order(names(filt_count_tab))]
#add 1 to every count, so it is possible to calculate the geometric mean later
filt_count_tab1 <- filt_count_tab + 1
# create a DESeq2 object
deseq_counts <- DESeqDataSetFromMatrix(filt_count_tab1, colData = sample_info, design = ~Species)
## converting counts to integer mode
deseq_counts_vst <- varianceStabilizingTransformation(deseq_counts, fitType='local')
#save as a matrix
vst_trans_count_tab <- assay(deseq_counts_vst)
#transform lowest values (absent counts) to 0
vst_trans_count_tab <- sweep(vst_trans_count_tab, 2, apply(vst_trans_count_tab, 2, min), "-")
sample_info$color[sample_info$Species == "Pinus"] <- "dodgerblue3"
sample_info$color[sample_info$Species == "Diphasiastrum"] <- "darkorange1"
sample_info$color[sample_info$Species == "Vaccinium"] <- "purple4"
sample_info$color[sample_info$Species == "Soil"] <- "hotpink2"
sample_info_no_rep <- read.table("Sample_Info_noR.txt",header=T, row.names=1, check.names=F)
sample_info_no_rep<-sample_info_no_rep[order(rownames(sample_info_no_rep)), ]
sample_info_no_rep$color[sample_info_no_rep$Species == "Pinus"] <- "dodgerblue3"
sample_info_no_rep$color[sample_info_no_rep$Species == "Diphasiastrum"] <- "darkorange1"
sample_info_no_rep$color[sample_info_no_rep$Species == "Vaccinium"] <- "purple4"
sample_info_no_rep$color[sample_info_no_rep$Species == "Soil"] <- "hotpink2"
#remove technical replicates:
Venn_OTU <- as.data.frame(vst_trans_count_tab)
Venn_OTU <- (Venn_OTU[, !grepl("_", colnames(Venn_OTU))])
Venn_S <- grepl("S", names(Venn_OTU))
OTU_S <- rownames(Venn_OTU)[rowSums(Venn_OTU[, Venn_S]) != 0]
Venn_P <- grepl("P", names(Venn_OTU))
OTU_P <- rownames(Venn_OTU)[rowSums(Venn_OTU[, Venn_P]) != 0]
Venn_V <- grepl("V", names(Venn_OTU))
OTU_V <- rownames(Venn_OTU)[rowSums(Venn_OTU[, Venn_V]) != 0]
Venn_D <- grepl("D", names(Venn_OTU))
OTU_D <- rownames(Venn_OTU)[rowSums(Venn_OTU[, Venn_V]) != 0]
venn <- list("Soil" = OTU_S, "PINsyl" = OTU_P, "VACmyr" = OTU_V, "DIPHcom" = OTU_D)
ggvenn(venn, fill_color = c("hotpink2", "dodgerblue3","purple4","darkorange1"),set_name_size = 4)
#write the table to use it in jvenn tool: https://jvenn.toulouse.inrae.fr/app/index.html
#max_length <- max(length(OTU_S), length(OTU_P), length(OTU_V), length(OTU_D))
#OTU_S <- c(OTU_S, rep(NA, max_length - length(OTU_S)))
#OTU_P <- c(OTU_P, rep(NA, max_length - length(OTU_P)))
#OTU_V <- c(OTU_V, rep(NA, max_length - length(OTU_V)))
#OTU_D <- c(OTU_D, rep(NA, max_length - length(OTU_D)))
#Venn_df <- data.frame(OTU_S, OTU_P, OTU_V, OTU_D)
#write_tsv(Venn_df, "venn")
# create phyloseq object with normalized data
vst_count_phy <- otu_table(vst_trans_count_tab, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(sample_info)
vst_physeq <- phyloseq(vst_count_phy, sample_info_tab_phy)
#generating and visualizing the PCoA with phyloseq
vst_pcoa <- ordinate(vst_physeq, method="PCoA", distance="euclidean")
eigen_vals <- vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples
pca<-plot_ordination(vst_physeq, vst_pcoa, color="Species") +
labs(col="Species") + geom_point(size=1) +
geom_text_repel(aes(label=rownames(sample_info)), size =3, force = 5, max.overlaps = 50) +
coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA") +
scale_color_manual(values=unique(sample_info$color[order(sample_info$Species)])) +
theme(legend.position="none")
pca
Samples labelled as “S2”, “S2_2” and “S2_3” - three technical replicates of the soil sample S2. Same for D2, P2 and V2.
vst_count_phy <- otu_table(vst_trans_count_tab, taxa_are_rows=T) #count data
sample_info_tab_phy <- sample_data(sample_info_no_rep) # sample data
tax_tab_physeq_all <- tax_tab[tax_tab$OTU %in% rownames(vst_trans_count_tab), ]
tax_tab_physeq_matrix <- as.matrix(tax_tab_physeq_all[, c("OTU", "Phylum", "Class", "Order")])
rownames(tax_tab_physeq_matrix) <- tax_tab_physeq_all[[1]]
tax_matrix<- tax_table(tax_tab_physeq_matrix) # OTU data
vst_physeq <- merge_phyloseq(vst_count_phy, sample_info_tab_phy, tax_matrix)
vst_pcoa <- ordinate(vst_physeq, method="PCoA", distance = "euclidean")
eigen_vals <- vst_pcoa$values$Eigenvalues
pcoa<-plot_ordination(vst_physeq, vst_pcoa, color="Species") +
labs(col="Species") + geom_point(size=1) +
coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA: All OTUs") +
scale_color_manual(values=unique(sample_info_no_rep$color[order(sample_info_no_rep$Species)]))
vst_pcoa <- ordinate(vst_physeq, method = "NMDS", distance = "bray")
## Wisconsin double standardization
## Run 0 stress 0.197018
## Run 1 stress 0.1979774
## Run 2 stress 0.1990485
## Run 3 stress 0.2008448
## Run 4 stress 0.1968931
## ... New best solution
## ... Procrustes: rmse 0.01108067 max resid 0.06254305
## Run 5 stress 0.1990901
## Run 6 stress 0.1977946
## Run 7 stress 0.1979634
## Run 8 stress 0.2215886
## Run 9 stress 0.1979773
## Run 10 stress 0.1990486
## Run 11 stress 0.2215838
## Run 12 stress 0.1970252
## ... Procrustes: rmse 0.01105955 max resid 0.06280142
## Run 13 stress 0.2217747
## Run 14 stress 0.1968822
## ... New best solution
## ... Procrustes: rmse 0.001338591 max resid 0.006189816
## ... Similar to previous best
## Run 15 stress 0.2127896
## Run 16 stress 0.2127939
## Run 17 stress 0.1990562
## Run 18 stress 0.1970063
## ... Procrustes: rmse 0.004348012 max resid 0.03342215
## Run 19 stress 0.22015
## Run 20 stress 0.2252388
## *** Best solution repeated 1 times
nmds<- plot_ordination(vst_physeq, vst_pcoa, color="Species", title="NMDS: All OTUs", type ="sample") +
scale_color_manual(values=unique(sample_info_no_rep$color[order(sample_info_no_rep$Species)]))
# select only ECM OTUs:
ECM_OTUs <- tax_tab_physeq_all$OTU[tax_tab_physeq_all$Guild %in% c("Ectomycorrhizal")]
ECM_vst_trans_count_tab <- vst_trans_count_tab[ECM_OTUs, ]
vst_count_phy <- otu_table(ECM_vst_trans_count_tab, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(sample_info_no_rep)
vst_physeq <- merge_phyloseq(vst_count_phy, sample_info_tab_phy)
vst_pcoa <- ordinate(vst_physeq, method = "NMDS",distance = "bray")
## Wisconsin double standardization
## Run 0 stress 0.2822805
## Run 1 stress 0.3115204
## Run 2 stress 0.2878505
## Run 3 stress 0.2863862
## Run 4 stress 0.2811718
## ... New best solution
## ... Procrustes: rmse 0.08256871 max resid 0.2862336
## Run 5 stress 0.2919735
## Run 6 stress 0.2898905
## Run 7 stress 0.2918868
## Run 8 stress 0.2851065
## Run 9 stress 0.2985755
## Run 10 stress 0.2806006
## ... New best solution
## ... Procrustes: rmse 0.03352879 max resid 0.1503356
## Run 11 stress 0.2882571
## Run 12 stress 0.2897858
## Run 13 stress 0.2824357
## Run 14 stress 0.29074
## Run 15 stress 0.2882046
## Run 16 stress 0.2842404
## Run 17 stress 0.2914153
## Run 18 stress 0.2972009
## Run 19 stress 0.2950679
## Run 20 stress 0.2869468
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 2: no. of iterations >= maxit
## 18: stress ratio > sratmax
nmds_ECM<-plot_ordination(vst_physeq, vst_pcoa, color="Species", title="NMDS: Ectomycorrhizal", type = "sample")+
scale_color_manual(values=unique(sample_info_no_rep$color[order(sample_info_no_rep$Species)]))
# select only Saprotrophic OTUs:
Sapro_OTUs <- tax_tab_physeq_all$OTU[tax_tab_physeq_all$Guild %in% c("Saprotroph")]
Sapro_vst_trans_count_tab <- vst_trans_count_tab[Sapro_OTUs, ]
vst_count_phy <- otu_table(Sapro_vst_trans_count_tab, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(sample_info_no_rep)
vst_physeq <- merge_phyloseq(vst_count_phy, sample_info_tab_phy)
#generating and visualizing the PCoA with phyloseq
vst_pcoa <- ordinate(vst_physeq, method = "NMDS",distance = "bray")
## Wisconsin double standardization
## Run 0 stress 0.2039362
## Run 1 stress 0.2158043
## Run 2 stress 0.2038225
## ... New best solution
## ... Procrustes: rmse 0.006115275 max resid 0.03270896
## Run 3 stress 0.2038152
## ... New best solution
## ... Procrustes: rmse 0.0003874135 max resid 0.001638865
## ... Similar to previous best
## Run 4 stress 0.2277129
## Run 5 stress 0.206158
## Run 6 stress 0.2039516
## ... Procrustes: rmse 0.006299264 max resid 0.03366575
## Run 7 stress 0.204362
## Run 8 stress 0.2061578
## Run 9 stress 0.2037455
## ... New best solution
## ... Procrustes: rmse 0.003662047 max resid 0.02857684
## Run 10 stress 0.2057254
## Run 11 stress 0.2061594
## Run 12 stress 0.2275107
## Run 13 stress 0.2160111
## Run 14 stress 0.2328631
## Run 15 stress 0.2038152
## ... Procrustes: rmse 0.003662233 max resid 0.02856119
## Run 16 stress 0.2040974
## ... Procrustes: rmse 0.006520555 max resid 0.04329109
## Run 17 stress 0.2156937
## Run 18 stress 0.2059085
## Run 19 stress 0.2037454
## ... New best solution
## ... Procrustes: rmse 0.0001252684 max resid 0.0007076332
## ... Similar to previous best
## Run 20 stress 0.2164165
## *** Best solution repeated 1 times
nmds_Sapro<-plot_ordination(vst_physeq, vst_pcoa, color="Species", title="NMDS: Saprotrophs", type = "sample")+
scale_color_manual(values=unique(sample_info_no_rep$color[order(sample_info_no_rep$Species)]))
grid.arrange(nmds,nmds_ECM, nmds_Sapro,pcoa, nrow = 2)
#read info table with the samples without replicates
sample_info_no_rep <- read.table("Sample_Info_noR.txt", header=T, row.names=1, check.names=F)
tax_table<-tax_tab
row.names(tax_table) <- tax_table$OTU
tax_table <- tax_table[, -1]
tax_table<- tax_table[order(rownames(tax_table)),]
counts_norm_df<-as.data.frame(vst_trans_count_tab)
# create microeco file
meco_fungi <- microtable$new(sample_table = sample_info_no_rep, otu_table = counts_norm_df, tax_table = tax_table)
# color palette
mycol <- c("#A6CEE3" ,"#1F78B4" ,"#B2DF8A" ,"#33A02C" ,"#FB9A99", "#E31A1C" ,"#FDBF6F" ,"#FF7F00" ,"#CAB2D6" ,"#6A3D9A" ,"#FFFF99" ,"#B15928" , "#6fdc8c", "green4", "#004144")
t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Phylum", ntaxa = 15, groupmean = "Species")
Phylum <- t1$plot_bar(bar_full = FALSE, xtext_keep = TRUE, xtext_angle = 90,xtext_size = 6, color_values = mycol)
t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Order", ntaxa = 15, groupmean = "Species")
Order <- t1$plot_bar(bar_full = FALSE,xtext_keep = TRUE, xtext_angle = 90,xtext_size = 6, color_values = mycol)
t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Class", ntaxa = 15, groupmean = "Species")
Class <- t1$plot_bar(bar_full = FALSE, xtext_keep = TRUE, xtext_angle = 90, xtext_size = 6, color_values = mycol)
t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Family", ntaxa = 15, groupmean = "Species")
Family<-t1$plot_bar(bar_full = FALSE, xtext_keep = TRUE, xtext_angle = 90,xtext_size = 6, color_values = mycol)
t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Genus", ntaxa = 15, groupmean = "Species")
Genus<-t1$plot_bar(bar_full = FALSE, xtext_keep = TRUE, xtext_angle = 90,xtext_size = 6, color_values = mycol)
grid.arrange(Phylum, Order, Class, Family, Genus, nrow = 2)
t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Class", ntaxa = 10)
## The transformed abundance data is stored in object$data_abund ...
t1$plot_bar(bar_type = "notfull", use_alluvium = T, xtext_keep = TRUE, xtext_angle = 90,facet = "Location",xtext_size = 6, color_values = RColorBrewer::brewer.pal(10, "Set3"))
## Warning: The `bar_type` argument of `plot_bar()` is deprecated as of microeco 1.7.0.
## ℹ Please use the `bar_full` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
“-” = not determined
https://grunwaldlab.github.io/metacoder_documentation/workshop--04--manipulating.html
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df2<-counts_norm_df
row_names <- rownames(counts_norm_df)
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
# Read sample info without technical replicates
sample_data <-read.table("Sample_Info_noR.txt", sep = "\t", header=T, check.names=F)
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" )
#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
class_cols = "taxonomy",
class_sep = ";")
#remove unclassified taxa
obj <- filter_taxa(obj, taxon_names != "-")
set.seed(50)
heattree<-heat_tree(obj, node_label = gsub(pattern = "\\[|\\]", replacement = "", taxon_names),
node_size = n_obs,
node_size_range = c(0.01, 0.04),
node_color = n_obs,
node_color_range = c("#C9E4CA", "#87BBA2", "#55828B", "#3B6064", "#364958"),
edge_label = n_obs,
edge_label_size = 0.015,
node_label_size_range = c(0.01, 0.015),
node_color_axis_label = "OTUs",
layout = "davidson-harel", initial_layout = "reingold-tilford")
plot(heattree)
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
sample_data <-read.table("Sample_Info_noR.txt", sep = "\t", header=T, check.names=F)
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" )
Tax_otu_data <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates
#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
class_cols = "taxonomy", # The column in the input table
class_sep = ";") # What each taxon is separated by
#remove "-" (unclassified taxa):
obj <- filter_taxa(obj, taxon_names != "-")
#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 76 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Species, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 76 columns in 4 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund",
cols = sample_data$Sample, # What columns of sample data to use
groups = sample_data$Species) # What category each sample is assigned to
#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
set.seed(50)
heattree<-heat_tree_matrix(obj, key_size = 0.65,
data = "diff_table",
node_size = n_obs, # the number of OTUs per taxon
node_label_size_range = c(0.01, 0.035),
node_label = taxon_names,
node_color = log2_median_ratio,
node_color_range = diverging_palette(),
node_color_trans = "linear",
node_color_interval = c(-2, 2), # The range of `log2_median_ratio`
edge_color_interval = c(-2, 2), # The range of `log2_median_ratio`
node_size_axis_label = "Number of OTUs",
node_color_axis_label = "Log2 ratio median proportions",
layout = "davidson-harel", # The primary layout algorithm
initial_layout = "reingold-tilford") # The layout algorithm that initializes node locations
plot(heattree)
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
sample_data <-read.table("Sample_Info_noR.txt", sep = "\t", header=T, check.names=F)
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" )
Tax_otu_data <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates
#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
class_cols = "taxonomy", # The column in the input table
class_sep = ";") # What each taxon is separated by
#remove "-" (unclassified taxa) and rare taxa:
obj <- filter_taxa(obj, taxon_names != "-")
obj <- filter_taxa(obj, n_obs > 1)
#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 76 columns for 118 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Species, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 76 columns in 4 groups for 118 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund",
cols = sample_data$Sample, # What columns of sample data to use
groups = sample_data$Species) # What category each sample is assigned to
#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
set.seed(50)
heattree<-heat_tree_matrix(obj, key_size = 0.65,
data = "diff_table",
node_size = n_obs, # the number of OTUs per taxon
node_label_size_range = c(0.02, 0.04),
node_label = taxon_names,
node_color = log2_median_ratio,
node_color_range = diverging_palette(),
node_color_trans = "linear",
node_color_interval = c(-2, 2), # The range of `log2_median_ratio`
edge_color_interval = c(-2, 2), # The range of `log2_median_ratio`
node_size_axis_label = "Number of OTUs",
node_color_axis_label = "Log2 ratio median proportions",
layout = "davidson-harel", # The primary layout algorithm
initial_layout = "reingold-tilford") # The layout algorithm that initializes node locations
plot(heattree)
# I used normalized counts after NTCs treatments and without technical replicates
count_tab <- counts_norm_df[, unlist(lapply(counts_norm_df, is.numeric))]
row.names(count_tab) <- count_tab$OTU.ID
count_tab <- subset(count_tab, select = -grep("_", names(count_tab))) #delete technical replicates
count_tab <- count_tab > 0
# get OTU incidence data for all plots separately
inc_P <- count_tab[,grepl("P", colnames(count_tab))]
inc_D <- count_tab[,grepl("D", colnames(count_tab))]
inc_V <- count_tab[,grepl("V", colnames(count_tab))]
inc_S <- count_tab[,grepl("S", colnames(count_tab))]
# create a dataframe with incidence data summed by rows within plots
inc_byspecies <- cbind(rowSums(inc_P), rowSums(inc_D), rowSums(inc_V),
rowSums(inc_S))
colnames(inc_byspecies) <- c("Pinus", "Diphasiastrum", "Vaccinium", "Soil")
inc_byspecies <- rbind(c(length(colnames(inc_P)),
length(colnames(inc_D)),
length(colnames(inc_V)),
length(colnames(inc_S))), inc_byspecies)
inc_byspecies<-as.data.frame(inc_byspecies)
# Get the diversity estimates
out <- iNEXT(inc_byspecies, q=c(0,1,2), datatype ="incidence_freq", se = TRUE)
# Plot the diversity estimates
# Sample-size-based rarefaction and extrapolation (R/E) curve (type=1). This curve plots diversity estimates with 95% confidence intervals (if se=TRUE) as a function of sample size up to double the reference sample size, by default, or a user‐specified endpoint:
ggiNEXT(out, type=1, facet.var="Order.q", se = TRUE, color.var="Assemblage")
Hill number = effective number of species/OTUs. Hill numbers determines the measures’ sensitivity to species/OTUs relative abundances. Hill numbers include the three most widely used species diversity measures as special cases: species richness (q = 0), Shannon diversity (q = 1, as the effective number of common species/OTUs in the assemblage) and Simpson diversity (q = 2, effective number of dominant species/OTUs in the assemblage)
ggiNEXT(out, type=2, facet.var="None", color.var="Assemblage", se = TRUE)
Sample completeness curve (type=2) with confidence intervals (if se=TRUE): see Figs. 1b and 2b in Hsieh et al. (2016). This curve plots the sample coverage with respect to sample size for the same range described in (1).
ggiNEXT(out, type=3, facet.var="Order.q", color.var="Assemblage", se =TRUE)
Coverage-based R/E curve (type=3): see Figs. 1c and 2c in Hsieh et al. (2016). This curve plots the diversity estimates with confidence intervals (if se=TRUE) as a function of sample coverage up to the maximum coverage obtained from the maximum size described in (1)
# select only ECM OTUs:
matching_rows <- tax_tab$OTU[tax_tab$Guild %in% c("Ectomycorrhizal")]
# Subset the matrix based on the matching row names
filtered_vs_trans_count_tab <- vst_trans_count_tab[matching_rows, ]
Symbio_df <- as.data.frame(filtered_vs_trans_count_tab)
#I used normalized counts of reads after removing of OTUs found in NTCs and technical replicates
count_tab <- Symbio_df[, unlist(lapply(Symbio_df, is.numeric))]
row.names(count_tab) <- count_tab$OTU.ID
count_tab <- subset(count_tab, select = -grep("_", names(count_tab))) #delete technical replicates
count_tab <- count_tab > 0
inc_P <- count_tab[,grepl("P", colnames(count_tab))] # get OTU incidence data for all plots separately
inc_D <- count_tab[,grepl("D", colnames(count_tab))]
inc_V <- count_tab[,grepl("V", colnames(count_tab))]
inc_S <- count_tab[,grepl("S", colnames(count_tab))]
# create a dataframe with incidence data summed by rows within plots
inc_byspecies <- cbind(rowSums(inc_P), rowSums(inc_D), rowSums(inc_V),
rowSums(inc_S))
colnames(inc_byspecies) <- c("Pinus", "Diphasiastrum", "Vaccinium", "Soil")
inc_byspecies <- rbind(c(length(colnames(inc_P)),
length(colnames(inc_D)),
length(colnames(inc_V)),
length(colnames(inc_S))), inc_byspecies)
inc_byspecies<-as.data.frame(inc_byspecies)
# Get the diversity estimates
out <- iNEXT(inc_byspecies, q=c(0,1,2), datatype ="incidence_freq", se = TRUE)
# Plot the diversity estimates
ggiNEXT(out, type=1, facet.var="Order.q", se = TRUE, color.var="Assemblage")
Sample-size-based rarefaction and extrapolation (R/E) curve (type=1). This curve plots diversity estimates with 95% confidence intervals (if se=TRUE) as a function of sample size up to double the reference sample size, by default, or a user‐specified endpoint:
ggiNEXT(out, type=2, facet.var="None", color.var="Assemblage", se = TRUE)
#read info table with the samples without replicates
sample_info_no_rep <- read.table("Sample_Info_noR.txt", header=T, row.names=1, check.names=F)
tax_table<-tax_tab
row.names(tax_table) <- tax_table$OTU
tax_table <- tax_table[, -1]
tax_table<- tax_table[order(rownames(tax_table)),]
# create a data frame from normalized dataset
counts_norm_df<-as.data.frame(vst_trans_count_tab)
row_names <- rownames(counts_norm_df)
counts_norm_df <-as.data.frame(counts_norm_df)
rownames(counts_norm_df) <- row_names
# create microeco file
meco_fungi <- microtable$new(sample_table = sample_info_no_rep, otu_table = counts_norm_df, tax_table = tax_table)
t1t <- trans_abund$new(dataset = meco_fungi, taxrank = "Trophic_Mode", ntaxa = 10)
## No taxa_abund list found. Calculate relative abundance with cal_abund function ...
## The result is stored in object$taxa_abund ...
## The transformed abundance data is stored in object$data_abund ...
scolor<-c("darkorange1","dodgerblue3","hotpink2", "purple4")
Trophic_Mode <- t1t$plot_box(group = "Species", xtext_angle = 30, color_values = scolor) + ylab("Relative abundance (%)")
Trophic_Mode
abund_t<-t1t$data_abund
shapiro.test(abund_t[abund_t$Taxonomy == "Saprotroph",]$Abundance) # data normally distributed
##
## Shapiro-Wilk normality test
##
## data: abund_t[abund_t$Taxonomy == "Saprotroph", ]$Abundance
## W = 0.98623, p-value = 0.5848
bartlett.test(Abundance ~ Species, abund_t[abund_t$Taxonomy == "Saprotroph",]) # variances are equal
##
## Bartlett test of homogeneity of variances
##
## data: Abundance by Species
## Bartlett's K-squared = 5.5208, df = 3, p-value = 0.1374
TukeySapro <- TukeyHSD(aov(Abundance ~ Species, abund_t[abund_t$Taxonomy == "Saprotroph",]))
TukeySapro
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ Species, data = abund_t[abund_t$Taxonomy == "Saprotroph", ])
##
## $Species
## diff lwr upr p adj
## Pinus-Diphasiastrum -4.043536 -6.9354675 -1.1516037 0.0024935
## Soil-Diphasiastrum -1.281917 -4.1738493 1.6100145 0.6501675
## Vaccinium-Diphasiastrum -2.397722 -5.2896541 0.4942098 0.1384604
## Soil-Pinus 2.761618 -0.1303138 5.6535501 0.0665675
## Vaccinium-Pinus 1.645813 -1.2461185 4.5377454 0.4447497
## Vaccinium-Soil -1.115805 -4.0077367 1.7761272 0.7413522
abund_t<-t1t$data_abund
shapiro.test(abund_t[abund_t$Taxonomy == "Symbiotroph",]$Abundance) # data not normally distributed
##
## Shapiro-Wilk normality test
##
## data: abund_t[abund_t$Taxonomy == "Symbiotroph", ]$Abundance
## W = 0.92837, p-value = 0.0003521
bartlett.test(Abundance ~ Species, abund_t[abund_t$Taxonomy == "Symbiotroph",]) # variances are not equal
##
## Bartlett test of homogeneity of variances
##
## data: Abundance by Species
## Bartlett's K-squared = 9.9411, df = 3, p-value = 0.01907
kruskal_effsize(abund_t[abund_t$Taxonomy == "Symbiotroph",], Abundance ~ Species )
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Abundance 76 0.172 eta2[H] large
res.kruskal <-kruskal_test(abund_t[abund_t$Taxonomy == "Symbiotroph",], Abundance ~ Species)
res.kruskal
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 Abundance 76 15.4 3 0.00153 Kruskal-Wallis
pwc<-dunn_test(abund_t[abund_t$Taxonomy == "Symbiotroph",], Abundance ~ Species, p.adjust.method = "bonferroni")
pwc
## # A tibble: 6 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 Abundance Diphasias… Pinus 19 19 3.86 1.12e-4 6.69e-4 ***
## 2 Abundance Diphasias… Soil 19 19 1.37 1.70e-1 1 e+0 ns
## 3 Abundance Diphasias… Vacci… 19 19 1.65 9.84e-2 5.90e-1 ns
## 4 Abundance Pinus Soil 19 19 -2.49 1.28e-2 7.66e-2 ns
## 5 Abundance Pinus Vacci… 19 19 -2.21 2.70e-2 1.62e-1 ns
## 6 Abundance Soil Vacci… 19 19 0.279 7.80e-1 1 e+0 ns
#read info table with the samples without replicates
sample_info_no_rep <- read.table("Sample_Info_noR.txt", header=T, row.names=1, check.names=F)
tax_table<-tax_tab
row.names(tax_table) <- tax_table$OTU
tax_table <- tax_table[, -1]
tax_table<- tax_table[order(rownames(tax_table)),]
# create a data frame from normalized data
counts_norm_df<-as.data.frame(vst_trans_count_tab)
row_names <- rownames(counts_norm_df)
counts_norm_df <-as.data.frame(counts_norm_df)
rownames(counts_norm_df) <- row_names
# create microeco file
meco_fungi <- microtable$new(sample_table = sample_info_no_rep, otu_table = counts_norm_df, tax_table = tax_table)
t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Guild", ntaxa = 15)
## No taxa_abund list found. Calculate relative abundance with cal_abund function ...
## The result is stored in object$taxa_abund ...
## The transformed abundance data is stored in object$data_abund ...
Guilds <- t1$plot_box(group = "Species", xtext_angle = 30, color_values = scolor) + ylab("Relative abundance (%)")
Guilds
abund<-t1$data_abund
shapiro.test(abund[abund$Taxonomy == "Ectomycorrhizal",]$Abundance) # data not normally distributed
##
## Shapiro-Wilk normality test
##
## data: abund[abund$Taxonomy == "Ectomycorrhizal", ]$Abundance
## W = 0.96117, p-value = 0.02019
bartlett.test(Abundance ~ Species, abund[abund$Taxonomy == "Ectomycorrhizal",]) # variances are not equal
##
## Bartlett test of homogeneity of variances
##
## data: Abundance by Species
## Bartlett's K-squared = 11.005, df = 3, p-value = 0.0117
kruskal_effsize(abund[abund$Taxonomy == "Ectomycorrhizal",], Abundance ~ Species )
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Abundance 76 0.101 eta2[H] moderate
res.kruskal <-kruskal_test(abund[abund$Taxonomy == "Ectomycorrhizal",], Abundance ~ Species)
res.kruskal
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 Abundance 76 10.3 3 0.0162 Kruskal-Wallis
pwc<-dunn_test(abund[abund$Taxonomy == "Ectomycorrhizal",], Abundance ~ Species, p.adjust.method = "bonferroni")
pwc
## # A tibble: 6 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 Abundance Diphasiast… Pinus 19 19 3.13 0.00175 0.0105 *
## 2 Abundance Diphasiast… Soil 19 19 1.88 0.0600 0.360 ns
## 3 Abundance Diphasiast… Vacci… 19 19 1.18 0.240 1 ns
## 4 Abundance Pinus Soil 19 19 -1.25 0.212 1 ns
## 5 Abundance Pinus Vacci… 19 19 -1.95 0.0507 0.304 ns
## 6 Abundance Soil Vacci… 19 19 -0.705 0.481 1 ns
-> Pinus has more ECM than Diphasiastrum
shapiro.test(abund[abund$Taxonomy == "Saprotroph",]$Abundance) # data normally distributed
##
## Shapiro-Wilk normality test
##
## data: abund[abund$Taxonomy == "Saprotroph", ]$Abundance
## W = 0.96708, p-value = 0.04557
bartlett.test(Abundance ~ Species, abund[abund$Taxonomy == "Saprotroph",]) # variances are equal
##
## Bartlett test of homogeneity of variances
##
## data: Abundance by Species
## Bartlett's K-squared = 11.619, df = 3, p-value = 0.008808
TukeySapro <- TukeyHSD(aov(Abundance ~ Species, abund[abund$Taxonomy == "Saprotroph",], correction =))
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'correction' will be disregarded
TukeySapro
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ Species, data = abund[abund$Taxonomy == "Saprotroph", ], correction = )
##
## $Species
## diff lwr upr p adj
## Pinus-Diphasiastrum 0.187951791 -4.4001312 4.7760347 0.9995482
## Soil-Diphasiastrum -4.998432579 -9.5865155 -0.4103496 0.0273615
## Vaccinium-Diphasiastrum -0.003747485 -4.5918304 4.5843355 1.0000000
## Soil-Pinus -5.186384370 -9.7744673 -0.5983014 0.0204558
## Vaccinium-Pinus -0.191699276 -4.7797822 4.3963837 0.9995207
## Vaccinium-Soil 4.994685094 0.4066021 9.5827681 0.0275181
-> Soil has more Saprotrophs than other sample types
#dataset only for Pinus:
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
# Select all samples ans OTU column:
columns_to_keep <- c(TRUE, !grepl("[_]", names(df)[-1]))
df_all <- df[, columns_to_keep, drop = FALSE]
sample_info_no_R <- sample_info_no_rep
sample_data <- sample_info_no_R
sample_data <- sample_info_no_R[sample_info_no_R$Species == "Pinus",]
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" )
All_table<-Tax_otu_data
#write.table(All_table, file = "OTU_count_DIPH.txt", append = FALSE, quote = FALSE, sep = " ",
# eol = "\n", na = "NA", dec = ".", row.names = FALSE,
# col.names = TRUE, qmethod = c("escape", "double"),
# fileEncoding = "")
sample_info_no_R <- read.table("Sample_Info_noR.txt", header=T, check.names=F)
Age<-sort(sample_info_no_R$Age)
plot(Age)
sample_info_no_R$Age_group <- ifelse(sample_info_no_R$Age < 100, "young",
ifelse(sample_info_no_R$Age < 110, "average", "old"))
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
sample_data <- sample_info_no_R
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" )
Tax_otu_data <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates
#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
class_cols = "taxonomy", # The column in the input table
class_sep = ";") # What each taxon is separated by
#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")
#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 76 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Age_group, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 76 columns in 3 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund",
cols = sample_data$Sample, # What columns of sample data to use
groups = sample_data$Age_group) # What category each sample is assigned to
#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
obj$data$diff_table
## # A tibble: 1,023 × 7
## taxon_id treatment_1 treatment_2 log2_median_ratio median_diff mean_diff
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 ab average old 0 -30.7 -39.1
## 2 ac average old 0 -22.3 -11.5
## 3 ad average old 0 -11.7 -7.56
## 4 ae average old 0 -15.2 -8.33
## 5 af average old 0 -6.99 -6.23
## 6 ag average old 0 0 -0.296
## 7 ah average old 0 -0.151 -0.596
## 8 ai average old 0 -5.40 -4.22
## 9 ak average old 0 0 -0.0455
## 10 al average old 0 0 -0.0458
## # ℹ 1,013 more rows
## # ℹ 1 more variable: wilcox_p_value <dbl>
range(obj$data$diff_table$wilcox_p_value, finite = TRUE) #check if there are significant differences
## [1] 0.09840729 1.00000000
No significant difference
sample_info_no_R$Age_group <- ifelse(sample_info_no_R$Age < mean(sample_info_no_R$Age), "young", "old" )
sample_info_no_R$Age_group <- ifelse(sample_info_no_R$Age < 100, "young", "old" )
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
# Select only Pinus samples:
columns_to_keep <- c(TRUE, !grepl("[DVS]", names(df)[-1])) # Including the first column
df_P <- df[, columns_to_keep, drop = FALSE] # Subset dataframe with selected columns
sample_data <- sample_info_no_R
sample_data <- sample_info_no_R[sample_info_no_R$Species == "Pinus",]
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df_P, tax_data, by = "OTU" )
Tax_otu_data <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates
#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
class_cols = "taxonomy", # The column in the input table
class_sep = ";") # What each taxon is separated by
#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")
#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 19 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Age_group, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 19 columns in 2 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund",
cols = sample_data$Sample, # What columns of sample data to use
groups = sample_data$Age_group) # What category each sample is assigned to
#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
## [1] 0.3429964 1.0000000
No significant differences
Height<-sort(sample_info_no_R$Height)
plot(Height)
sample_info_no_R$Height_group <- ifelse(sample_info_no_R$Height < mean(sample_info_no_R$Height), "small", "big" )
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
sample_data <- sample_info_no_R
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" )
Tax_otu_data <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates
#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
class_cols = "taxonomy", # The column in the input table
class_sep = ";") # What each taxon is separated by
#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")
#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 76 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Height_group, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 76 columns in 2 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund",
cols = sample_data$Sample, # What columns of sample data to use
groups = sample_data$Height_group) # What category each sample is assigned to
obj <- mutate_obs(obj, "diff_table", wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr"))
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
## [1] 0.5460561 1.0000000
No significant differences
sample_info_no_R$Height_group <- ifelse(sample_info_no_R$Height < mean(sample_info_no_R$Height), "small", "big" )
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
# Select only Pinus samples ans OTU column:
columns_to_keep <- c(TRUE, !grepl("[DVS_]", names(df)[-1]))
df_P <- df[, columns_to_keep, drop = FALSE]
sample_data <- sample_info_no_R
sample_data <- sample_info_no_R[sample_info_no_R$Species == "Pinus",]
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df_P, tax_data, by = "OTU" )
Tax_otu_data <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates
#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
class_cols = "taxonomy", # The column in the input table
class_sep = ";") # What each taxon is separated by
#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")
#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 19 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Height_group, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 19 columns in 2 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund",
cols = sample_data$Sample, # What columns of sample data to use
groups = sample_data$Height_group) # What category each sample is assigned to
obj <- mutate_obs(obj, "diff_table", wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr"))
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
## [1] 0.1323042 1.0000000
-> No significant differences
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs
counts_norm_df<-as.data.frame(vst_trans_count_tab)
counts_norm_df2<-counts_norm_df
row_names <- rownames(counts_norm_df)
rownames(counts_norm_df2) <- NULL
counts_norm_df<-as.data.frame(vst_trans_count_tab)
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column
# Select only Diphasiastrum samples and OTU column:
columns_to_keep <- c(TRUE, !grepl("[SPV_]", names(df)[-1]))
df_D <- df[, columns_to_keep, drop = FALSE]
sample_info_no_R <- sample_info_no_rep
sample_data <- sample_info_no_R
sample_data <- sample_info_no_R[sample_info_no_R$Species == "Diphasiastrum",]
# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df_D, tax_data, by = "OTU" )
D_table<-Tax_otu_data[,1:20]
sample_info_no_R <- read.table("Sample_Info_noR.txt", header=T, check.names=F)
colony<-sort(sample_info_no_R$Diph_colony)
plot(colony)
sample_info_no_R$Diph_colony <- ifelse(sample_info_no_R$Diph_colony < 10, "small",
ifelse(sample_info_no_R$Diph_olony < 15, "average", "big"))
#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
class_cols = "taxonomy", # The column in the input table
class_sep = ";") # What each taxon is separated by
#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")
#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = rownames(sample_data))
## Summing per-taxon counts from 19 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Diph_colony, cols = rownames(sample_data))
## Calculating number of samples with a value greater than 0 for 19 columns in 16 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund",
cols = rownames(sample_data), # What columns of sample data to use
groups = sample_data$Diph_colony) # What category each sample is assigned to
#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
range(obj$data$diff_table$wilcox_p_value, finite = TRUE) #check if there are significant differences
## [1] 1 1
No significant differences